/**
 * Copyright © 2023-2030 The ruanrongman Authors
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package top.rslly.iot.utility.ai.voice.concentus;

class MDCT {

  /* Forward MDCT trashes the input array */
  static void clt_mdct_forward(MDCTLookup l, int[] input, int input_ptr, int[] output,
      int output_ptr,
      int[] window, int overlap, int shift, int stride) {
    int i;
    int N, N2, N4;
    int[] f;
    int[] f2;
    FFTState st = l.kfft[shift];
    short[] trig;
    int trig_ptr = 0;
    int scale;

    int scale_shift = st.scale_shift - 1;
    scale = st.scale;

    N = l.n;
    trig = l.trig;
    for (i = 0; i < shift; i++) {
      N = N >> 1;
      trig_ptr += N;
    }
    N2 = N >> 1;
    N4 = N >> 2;

    f = new int[N2];
    f2 = new int[N4 * 2];

    /* Consider the input to be composed of four blocks: [a, b, c, d] */
    /* Window, shuffle, fold */
    {
      /* Temp pointers to make it really clear to the compiler what we're doing */
      int xp1 = input_ptr + (overlap >> 1);
      int xp2 = input_ptr + N2 - 1 + (overlap >> 1);
      int yp = 0;
      int wp1 = (overlap >> 1);
      int wp2 = ((overlap >> 1) - 1);
      for (i = 0; i < ((overlap + 3) >> 2); i++) {
        /* Real part arranged as -d-cR, Imag part arranged as -b+aR */
        f[yp++] = Inlines.MULT16_32_Q15(window[wp2], input[xp1 + N2])
            + Inlines.MULT16_32_Q15(window[wp1], input[xp2]);
        f[yp++] = Inlines.MULT16_32_Q15(window[wp1], input[xp1])
            - Inlines.MULT16_32_Q15(window[wp2], input[xp2 - N2]);
        xp1 += 2;
        xp2 -= 2;
        wp1 += 2;
        wp2 -= 2;
      }
      wp1 = 0;
      wp2 = (overlap - 1);
      for (; i < N4 - ((overlap + 3) >> 2); i++) {
        /* Real part arranged as a-bR, Imag part arranged as -c-dR */
        f[yp++] = input[xp2];
        f[yp++] = input[xp1];
        xp1 += 2;
        xp2 -= 2;
      }
      for (; i < N4; i++) {
        /* Real part arranged as a-bR, Imag part arranged as -c-dR */
        f[yp++] = Inlines.MULT16_32_Q15(window[wp2], input[xp2])
            - Inlines.MULT16_32_Q15(window[wp1], input[xp1 - N2]);
        f[yp++] = Inlines.MULT16_32_Q15(window[wp2], input[xp1])
            + Inlines.MULT16_32_Q15(window[wp1], input[xp2 + N2]);
        xp1 += 2;
        xp2 -= 2;
        wp1 += 2;
        wp2 -= 2;
      }
    }
    /* Pre-rotation */
    {
      int yp = 0;
      int t = trig_ptr;
      for (i = 0; i < N4; i++) {
        short t0, t1;
        int re, im, yr, yi;
        t0 = trig[t + i];
        t1 = trig[t + N4 + i];
        re = f[yp++];
        im = f[yp++];
        yr = KissFFT.S_MUL(re, t0) - KissFFT.S_MUL(im, t1);
        yi = KissFFT.S_MUL(im, t0) + KissFFT.S_MUL(re, t1);
        f2[2 * st.bitrev[i]] = Inlines.PSHR32(Inlines.MULT16_32_Q16(scale, yr), scale_shift);
        f2[2 * st.bitrev[i] + 1] = Inlines.PSHR32(Inlines.MULT16_32_Q16(scale, yi), scale_shift);
      }
    }

    /* N/4 complex FFT, does not downscale anymore */
    KissFFT.opus_fft_impl(st, f2, 0);

    /* Post-rotate */
    {
      /* Temp pointers to make it really clear to the compiler what we're doing */
      int fp = 0;
      int yp1 = output_ptr;
      int yp2 = output_ptr + (stride * (N2 - 1));
      int t = trig_ptr;
      for (i = 0; i < N4; i++) {
        int yr, yi;
        yr = KissFFT.S_MUL(f2[fp + 1], trig[t + N4 + i]) - KissFFT.S_MUL(f2[fp], trig[t + i]);
        yi = KissFFT.S_MUL(f2[fp], trig[t + N4 + i]) + KissFFT.S_MUL(f2[fp + 1], trig[t + i]);
        output[yp1] = yr;
        output[yp2] = yi;
        fp += 2;
        yp1 += (2 * stride);
        yp2 -= (2 * stride);
      }
    }
  }

  static void clt_mdct_backward(MDCTLookup l, int[] input, int input_ptr, int[] output,
      int output_ptr,
      int[] window, int overlap, int shift, int stride) {
    int i;
    int N, N2, N4;
    int trig = 0;
    int xp1, xp2, yp, yp0, yp1;

    N = l.n;
    for (i = 0; i < shift; i++) {
      N >>= 1;
      trig += N;
    }
    N2 = N >> 1;
    N4 = N >> 2;

    /* Pre-rotate */
    /* Temp pointers to make it really clear to the compiler what we're doing */
    xp2 = input_ptr + (stride * (N2 - 1));
    yp = output_ptr + (overlap >> 1);
    short[] bitrev = l.kfft[shift].bitrev;
    int bitrav_ptr = 0;
    for (i = 0; i < N4; i++) {
      int rev = bitrev[bitrav_ptr++];
      int ypr = yp + 2 * rev;
      /* We swap real and imag because we use an FFT instead of an IFFT. */
      output[ypr + 1] = KissFFT.S_MUL(input[xp2], l.trig[trig + i])
          + KissFFT.S_MUL(input[input_ptr], l.trig[trig + N4 + i]); // yr
      output[ypr] = KissFFT.S_MUL(input[input_ptr], l.trig[trig + i])
          - KissFFT.S_MUL(input[xp2], l.trig[trig + N4 + i]); // yi
      /* Storing the pre-rotation directly in the bitrev order. */
      input_ptr += (2 * stride);
      xp2 -= (2 * stride);
    }

    KissFFT.opus_fft_impl(l.kfft[shift], output, output_ptr + (overlap >> 1));

    /*
     * Post-rotate and de-shuffle from both ends of the buffer at once to make it in-place.
     */
    yp0 = output_ptr + (overlap >> 1);
    yp1 = output_ptr + (overlap >> 1) + N2 - 2;
    int t = trig;

    /*
     * Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the middle pair will be computed twice.
     */
    int tN4m1 = t + N4 - 1;
    int tN2m1 = t + N2 - 1;
    for (i = 0; i < (N4 + 1) >> 1; i++) {
      int re, im, yr, yi;
      short t0, t1;
      /* We swap real and imag because we're using an FFT instead of an IFFT. */
      re = output[yp0 + 1];
      im = output[yp0];
      t0 = l.trig[t + i];
      t1 = l.trig[t + N4 + i];
      /* We'd scale up by 2 here, but instead it's done when mixing the windows */
      yr = KissFFT.S_MUL(re, t0) + KissFFT.S_MUL(im, t1);
      yi = KissFFT.S_MUL(re, t1) - KissFFT.S_MUL(im, t0);
      /* We swap real and imag because we're using an FFT instead of an IFFT. */
      re = output[yp1 + 1];
      im = output[yp1];
      output[yp0] = yr;
      output[yp1 + 1] = yi;
      t0 = l.trig[tN4m1 - i];
      t1 = l.trig[tN2m1 - i];
      /* We'd scale up by 2 here, but instead it's done when mixing the windows */
      yr = KissFFT.S_MUL(re, t0) + KissFFT.S_MUL(im, t1);
      yi = KissFFT.S_MUL(re, t1) - KissFFT.S_MUL(im, t0);
      output[yp1] = yr;
      output[yp0 + 1] = yi;
      yp0 += 2;
      yp1 -= 2;
    }

    /* Mirror on both sides for TDAC */
    xp1 = output_ptr + overlap - 1;
    yp1 = output_ptr;
    int wp1 = 0;
    int wp2 = (overlap - 1);

    for (i = 0; i < overlap / 2; i++) {
      int x1 = output[xp1];
      int x2 = output[yp1];
      output[yp1++] =
          Inlines.MULT16_32_Q15(window[wp2], x2) - Inlines.MULT16_32_Q15(window[wp1], x1);
      output[xp1--] =
          Inlines.MULT16_32_Q15(window[wp1], x2) + Inlines.MULT16_32_Q15(window[wp2], x1);
      wp1++;
      wp2--;
    }
  }
}
